; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
; RUN: llc -O0 -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs -stop-after=regallocfast < %s | FileCheck -check-prefixes=GCN %s

; Verify that we consider the xor at the end of the waterfall loop emitted for
; divergent indirect addressing as a terminator.

declare i32 @llvm.amdgcn.workitem.id.x() #1

; There should be no spill code inserted between the xor and the real terminator
define amdgpu_kernel void @extract_w_offset_vgpr(i32 addrspace(1)* %out) {
  ; GCN-LABEL: name: extract_w_offset_vgpr
  ; GCN: bb.0.entry:
  ; GCN:   successors: %bb.1(0x80000000)
  ; GCN:   liveins: $vgpr0, $sgpr0_sgpr1
  ; GCN:   SI_SPILL_V32_SAVE killed $vgpr0, %stack.3, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.3, addrspace 5)
  ; GCN:   renamable $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed renamable $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load (s64) from %ir.out.kernarg.offset.cast, align 4, addrspace 4)
  ; GCN:   renamable $sgpr6 = COPY renamable $sgpr1
  ; GCN:   renamable $sgpr0 = COPY renamable $sgpr0, implicit killed $sgpr0_sgpr1
  ; GCN:   renamable $sgpr4 = S_MOV_B32 61440
  ; GCN:   renamable $sgpr5 = S_MOV_B32 -1
  ; GCN:   undef renamable $sgpr0 = COPY killed renamable $sgpr0, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
  ; GCN:   renamable $sgpr1 = COPY killed renamable $sgpr6
  ; GCN:   renamable $sgpr2 = COPY killed renamable $sgpr5
  ; GCN:   renamable $sgpr3 = COPY killed renamable $sgpr4
  ; GCN:   SI_SPILL_S128_SAVE killed $sgpr0_sgpr1_sgpr2_sgpr3, %stack.2, implicit $exec, implicit $sgpr32 :: (store (s128) into %stack.2, align 4, addrspace 5)
  ; GCN:   renamable $sgpr0 = S_MOV_B32 16
  ; GCN:   renamable $sgpr1 = S_MOV_B32 15
  ; GCN:   renamable $sgpr2 = S_MOV_B32 14
  ; GCN:   renamable $sgpr3 = S_MOV_B32 13
  ; GCN:   renamable $sgpr4 = S_MOV_B32 12
  ; GCN:   renamable $sgpr5 = S_MOV_B32 11
  ; GCN:   renamable $sgpr6 = S_MOV_B32 10
  ; GCN:   renamable $sgpr7 = S_MOV_B32 9
  ; GCN:   renamable $sgpr8 = S_MOV_B32 8
  ; GCN:   renamable $sgpr9 = S_MOV_B32 7
  ; GCN:   renamable $sgpr10 = S_MOV_B32 6
  ; GCN:   renamable $sgpr11 = S_MOV_B32 5
  ; GCN:   renamable $sgpr12 = S_MOV_B32 3
  ; GCN:   renamable $sgpr13 = S_MOV_B32 2
  ; GCN:   renamable $sgpr14 = S_MOV_B32 1
  ; GCN:   renamable $sgpr15 = S_MOV_B32 0
  ; GCN:   renamable $vgpr0 = COPY killed renamable $sgpr15
  ; GCN:   renamable $vgpr30 = COPY killed renamable $sgpr14
  ; GCN:   renamable $vgpr29 = COPY killed renamable $sgpr13
  ; GCN:   renamable $vgpr28 = COPY killed renamable $sgpr12
  ; GCN:   renamable $vgpr27 = COPY killed renamable $sgpr11
  ; GCN:   renamable $vgpr26 = COPY killed renamable $sgpr10
  ; GCN:   renamable $vgpr25 = COPY killed renamable $sgpr9
  ; GCN:   renamable $vgpr24 = COPY killed renamable $sgpr8
  ; GCN:   renamable $vgpr23 = COPY killed renamable $sgpr7
  ; GCN:   renamable $vgpr22 = COPY killed renamable $sgpr6
  ; GCN:   renamable $vgpr21 = COPY killed renamable $sgpr5
  ; GCN:   renamable $vgpr20 = COPY killed renamable $sgpr4
  ; GCN:   renamable $vgpr19 = COPY killed renamable $sgpr3
  ; GCN:   renamable $vgpr18 = COPY killed renamable $sgpr2
  ; GCN:   renamable $vgpr17 = COPY killed renamable $sgpr1
  ; GCN:   renamable $vgpr16 = COPY killed renamable $sgpr0
  ; GCN:   undef renamable $vgpr0 = COPY killed renamable $vgpr0, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
  ; GCN:   renamable $vgpr1 = COPY killed renamable $vgpr30
  ; GCN:   renamable $vgpr2 = COPY killed renamable $vgpr29
  ; GCN:   renamable $vgpr3 = COPY killed renamable $vgpr28
  ; GCN:   renamable $vgpr4 = COPY killed renamable $vgpr27
  ; GCN:   renamable $vgpr5 = COPY killed renamable $vgpr26
  ; GCN:   renamable $vgpr6 = COPY killed renamable $vgpr25
  ; GCN:   renamable $vgpr7 = COPY killed renamable $vgpr24
  ; GCN:   renamable $vgpr8 = COPY killed renamable $vgpr23
  ; GCN:   renamable $vgpr9 = COPY killed renamable $vgpr22
  ; GCN:   renamable $vgpr10 = COPY killed renamable $vgpr21
  ; GCN:   renamable $vgpr11 = COPY killed renamable $vgpr20
  ; GCN:   renamable $vgpr12 = COPY killed renamable $vgpr19
  ; GCN:   renamable $vgpr13 = COPY killed renamable $vgpr18
  ; GCN:   renamable $vgpr14 = COPY killed renamable $vgpr17
  ; GCN:   renamable $vgpr15 = COPY killed renamable $vgpr16
  ; GCN:   SI_SPILL_V512_SAVE killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, %stack.1, $sgpr32, 0, implicit $exec :: (store (s512) into %stack.1, align 4, addrspace 5)
  ; GCN:   renamable $sgpr0_sgpr1 = S_MOV_B64 $exec
  ; GCN:   SI_SPILL_S64_SAVE killed $sgpr0_sgpr1, %stack.0, implicit $exec, implicit $sgpr32 :: (store (s64) into %stack.0, align 4, addrspace 5)
  ; GCN:   renamable $vgpr0 = IMPLICIT_DEF
  ; GCN:   renamable $sgpr0_sgpr1 = IMPLICIT_DEF
  ; GCN: bb.1:
  ; GCN:   successors: %bb.1(0x40000000), %bb.3(0x40000000)
  ; GCN:   $sgpr0_sgpr1 = SI_SPILL_S64_RESTORE %stack.4, implicit $exec, implicit $sgpr32 :: (load (s64) from %stack.4, align 4, addrspace 5)
  ; GCN:   $vgpr17 = SI_SPILL_V32_RESTORE %stack.5, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.5, addrspace 5)
  ; GCN:   $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = SI_SPILL_V512_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s512) from %stack.1, align 4, addrspace 5)
  ; GCN:   $vgpr16 = SI_SPILL_V32_RESTORE %stack.3, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.3, addrspace 5)
  ; GCN:   renamable $sgpr2 = V_READFIRSTLANE_B32 $vgpr16, implicit $exec
  ; GCN:   renamable $sgpr0_sgpr1 = V_CMP_EQ_U32_e64 $sgpr2, $vgpr16, implicit $exec
  ; GCN:   renamable $sgpr0_sgpr1 = S_AND_SAVEEXEC_B64 killed renamable $sgpr0_sgpr1, implicit-def $exec, implicit-def dead $scc, implicit $exec
  ; GCN:   renamable $vgpr0 = V_INDIRECT_REG_READ_GPR_IDX_B32_V16 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, killed $sgpr2, 11, implicit-def $m0, implicit $m0, implicit $exec
  ; GCN:   SI_SPILL_V32_SAVE $vgpr0, %stack.6, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.6, addrspace 5)
  ; GCN:   SI_SPILL_V32_SAVE killed $vgpr0, %stack.5, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.5, addrspace 5)
  ; GCN:   renamable $sgpr2_sgpr3 = COPY renamable $sgpr0_sgpr1
  ; GCN:   SI_SPILL_S64_SAVE killed $sgpr2_sgpr3, %stack.4, implicit $exec, implicit $sgpr32 :: (store (s64) into %stack.4, align 4, addrspace 5)
  ; GCN:   $exec = S_XOR_B64_term $exec, killed renamable $sgpr0_sgpr1, implicit-def dead $scc
  ; GCN:   S_CBRANCH_EXECNZ %bb.1, implicit $exec
  ; GCN: bb.3:
  ; GCN:   successors: %bb.2(0x80000000)
  ; GCN:   $sgpr0_sgpr1 = SI_SPILL_S64_RESTORE %stack.0, implicit $exec, implicit $sgpr32 :: (load (s64) from %stack.0, align 4, addrspace 5)
  ; GCN:   $exec = S_MOV_B64 renamable $sgpr0_sgpr1
  ; GCN: bb.2:
  ; GCN:   $vgpr0 = SI_SPILL_V32_RESTORE %stack.6, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.6, addrspace 5)
  ; GCN:   $sgpr0_sgpr1_sgpr2_sgpr3 = SI_SPILL_S128_RESTORE %stack.2, implicit $exec, implicit $sgpr32 :: (load (s128) from %stack.2, align 4, addrspace 5)
  ; GCN:   BUFFER_STORE_DWORD_OFFSET killed renamable $vgpr0, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store (s32) into %ir.out.load, addrspace 1)
  ; GCN:   S_ENDPGM 0
entry:
  %id = call i32 @llvm.amdgcn.workitem.id.x() #1
  %index = add i32 %id, 1
  %value = extractelement <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16>, i32 %index
  store i32 %value, i32 addrspace(1)* %out
  ret void
}
